library(data.table)
library(ggplot2)
library(beanplot) # for beanplots
library(gridExtra) # to combine ggplots together
library(grid) # to combine ggplots together
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file()) # tell RStudio to use project root directory as the root for this notebook. Needed since we are storing code in a separate directory.
Load data
# BioTime
load('data/biotime_blowes/bt_malin.Rdata')
bt <- data.table(bt_malin); rm(bt_malin)
# Temperature average, trends, and seasonality
temperature <- fread('output/temperature_byrarefyID.csv.gz')
# microclimates
microclim <- fread('output/microclimates.csv.gz', drop = 1)
# NPP
npp <- fread('output/npplandocean.csv.gz')
# Body size
bs <- fread('output/mass_byrarefyid.csv.gz', drop = 1)
bs[, ':='(STUDY_ID = NULL, REALM = NULL, taxa_mod = NULL)] # remove unnecessary columns
# Mobility
speed <- fread('output/speed_byrarefyID.csv.gz', drop = 1)
speed[, ':='(STUDY_ID = NULL, REALM = NULL, taxa_mod = NULL)] # remove unnecessary columns
# Lifespan
lsp <- fread('output/lifespan_byrarefyID.csv.gz')
# CTI
cti <- fread('output/cti_byrarefyID.csv.gz')
# consumer vs. producer
consfrac <- fread('output/consfrac_byrarefyID.csv.gz')
# richness
rich <- fread('output/richness_by_rarefyID.csv.gz') # number of species
# endotherm vs. ectotherm
endofrac <- fread('output/endofrac_byrarefyID.csv.gz') # endotherm vs. ectotherm classifications
# human impact
human <- fread('output/humanimpact_by_rarefyID.csv.gz')
# %veg
veg <- as.data.table(readRDS('output/vct_by_rarefyID.rds'))
veg[, veg := (`tree cover % (mean)` + 0.5 * `non-tree veg. % (mean)`)/100] # veg index from 0 (all non-veg) to 1 (all tree). Non-tree veg counts as 0.5.
Plot a turnover example
First is a long time-series, next one shows an example result of removing the first year of self-comparison.
#bt[, .(n = .N), by = rarefyID][n > 50,]
ggplot(bt[rarefyID == '339_1085477'], aes(YEAR, Jtu_base)) +
geom_point() +
geom_smooth(method = 'lm') +
xlab('Year') + ylab('Jaccard dissimilarity')

ggplot(bt[rarefyID == '489_16754'], aes(YEAR, Jtu_base)) +
geom_point() +
geom_smooth(method = 'lm', se = FALSE) +
xlab('Year') + ylab('Jaccard dissimilarity') +
geom_smooth(data = bt[rarefyID == '489_16754' & YEAR != 2000], method = 'lm', se = FALSE, color = 'red')

Assemble dataset of beta diversity trends (temporal turnover) and covariates
# calculate temporal turnover
calctrend <- function(y, YEAR, nm = 'y'){ # function to calc trends
# turn off warnings for the following
defaultW <- getOption("warn")
options(warn = -1)
mod <- lm(y ~ YEAR)
out <- list(y = coef(mod)[2], # coef for the slope
y_se = sqrt(diag(vcov(mod)))[2]) # SE
names(out) <- c(nm, paste0(nm, '_se'))
options(warn = defaultW)
return(out)
}
# function to calc trends, removing first year with 0
calctrendrem0 <- function(y, YEAR, nm = 'y'){
# turn off warnings for the following
defaultW <- getOption("warn")
options(warn = -1)
if(length(YEAR)>2){
o <- order(YEAR)
YEAR2 <- YEAR[o][2:length(YEAR)]
y2 <- y[o][2:length(y)]
if(sum(!is.na(y2)) >= 2){ # make sure enough values to fit a line
mod <- lm(y2 ~ YEAR2)
out <- list(y = coef(mod)[2], # coef for the slope
y_se = sqrt(diag(vcov(mod)))[2]) # SE
names(out) <- c(nm, paste0(nm, '_se'))
options(warn = defaultW)
return(out)
} else {
out <- list(y = NA_real_, y_se = NA_real_)
names(out) <- c(nm, paste0(nm, '_se'))
options(warn = defaultW)
return(out)
}
} else {
out <- list(y = NA_real_, y_se = NA_real_)
names(out) <- c(nm, paste0(nm, '_se'))
options(warn = defaultW)
return(out)
}
}
calcfirstlast <- function(y, YEAR, nm = 'y'){ # function to get distance from last to first year
# turn off warnings for the following
defaultW <- getOption("warn")
options(warn = -1)
if(length(YEAR)>1){
o <- order(YEAR)
YEAR2 <- YEAR[o][2:length(YEAR)]
y2 <- y[o][2:length(y)]
out <- list(y = y2[length(y2)], # dissimilarity for last year
y_se = NA_real_)
} else {
out <- list(y = NA_real_, y_se = NA_real_)
}
names(out) <- c(nm, paste0(nm, '_se'))
options(warn = defaultW)
return(out)
}
setkey(bt, STUDY_ID, rarefyID, YEAR)
if(file.exists('temp/trendstemp.rds')){
trends <- readRDS('temp/trendstemp.rds')
} else {
trends1 <- bt[, calctrend(Jtu_base, YEAR, 'Jtutrend'),
by = .(REALM, Biome, taxa_mod, STUDY_ID, rarefyID, rarefyID_x, rarefyID_y)] # calculate trend in Jaccard turnover from first year, plus SEs
trends2 <- bt[, calctrend(Jbeta_base, YEAR, 'Jbetatrend'),
by = .(rarefyID)] # calculate trend in total Jaccard' beta diversity's from first year,
trends3 <- bt[, calctrend(1-Horn_base, YEAR, 'Horntrend'),
by = .(rarefyID)] # calculate trend in Horn-Morisita from first year. Convert to dissimilarity.
trends4 <- bt[, .(Strend = coef(lm(I(log(S)) ~ YEAR))[2]), by = .(rarefyID)] # trend in log(S)
trends5 <- bt[, calctrendrem0(Jtu_base, YEAR, 'Jtutrendrem0'),
by = .(rarefyID)] # calculate trend in Jaccard turnover without first year
trends6 <- bt[, calctrendrem0(Jbeta_base, YEAR, 'Jbetatrendrem0'),
by = .(rarefyID)]
trends7 <- bt[, calctrendrem0(1-Horn_base, YEAR, 'Horntrendrem0'),
by = .(rarefyID)]
trends8 <- bt[, calcfirstlast(Jtu_base, YEAR, 'Jtulast'),
by = .(rarefyID)]
trends9 <- bt[, calcfirstlast(Jbeta_base, YEAR, 'Jbetalast'),
by = .(rarefyID)]
trends10 <- bt[, calcfirstlast(1-Horn_base, YEAR, 'Hornlast'),
by = .(rarefyID)]
nyrBT <- bt[, .(nyrBT = length(YEAR),
minyrBT = min(YEAR),
maxyrBT = max(YEAR),
medianyrBT = median(YEAR),
meanyrBT = mean(YEAR)),
by = .(rarefyID)] # number of years in time-series
trends <- merge(trends1, trends2) # merge in total J and Horn-Morisita
trends <- merge(trends, trends3)
trends <- merge(trends, trends5)
trends <- merge(trends, trends6)
trends <- merge(trends, trends7)
trends <- merge(trends, trends8)
trends <- merge(trends, trends9)
trends <- merge(trends, trends10)
trends <- merge(trends, nyrBT)
saveRDS(trends, file = 'temp/trendstemp.rds')
}
Add covariates
# add covariates
trends <- merge(trends, temperature[, .(rarefyID, tempave, tempave_metab, temptrend, seas)], all.x = TRUE, by = 'rarefyID') # temperature ave, ave metabolic, trend, and seasonality
trends <- merge(trends, microclim[, .(rarefyID, microclim = Temp_sd20km)], all.x = TRUE, by = 'rarefyID') # microclimates
trends <- merge(trends, npp, all.x = TRUE, by = 'rarefyID') # npp
trends <- merge(trends, bs[, .(rarefyID, mass_mean_weight, mass_sd_weight)], all.x = TRUE) # body size mass (g)
trends <- merge(trends, speed[, .(rarefyID, speed_mean_weight, speed_sd_weight)], all.x = TRUE) # speed (km/hr)
trends <- merge(trends, lsp[, .(rarefyID, lifespan_mean_weight, lifespan_sd_weight)], all.x = TRUE) # lifespan (yr)
trends <- merge(trends, cti[, .(rarefyID, thermal_bias)], all.x = TRUE) # thermal bias (degC)
trends <- merge(trends, consfrac[, .(rarefyID, consfrac)], all.x = TRUE) # fraction consumers
trends <- merge(trends, rich, all.x = TRUE) # species richness
trends <- merge(trends, endofrac[, .(rarefyID, endofrac)], all.x = TRUE) # endotherm vs. ectotherm
trends <- merge(trends, human[, .(rarefyID, human_bowler = atc, human_venter = hfp, human_halpern = himp)], all.x = TRUE) # human impact
trends <- merge(trends, veg[, .(rarefyID, veg = veg)], all.x = TRUE) # vegetation index
trends[REALM == 'Marine', veg := 0] # veg index is 0 at sea
Do some basic checks of the turnover calculations
Histograms of temporal change
# basic checks
trends
x <- trends[, hist(Jtutrend)]

x <- trends[, hist(Jbetatrend)]

x <- trends[, hist(Horntrend)]

x <- trends[, hist(Jtutrendrem0)]

x <- trends[, hist(Jbetatrendrem0)]

x <- trends[, hist(Horntrendrem0)]

x <- trends[, hist(Jtulast)]

x <- trends[, hist(Jbetalast)]

x <- trends[, hist(Hornlast)]

Change compared to number of years in time-series
# number of year
trends[, summary(nyrBT)]
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 2.000 4.000 5.519 7.000 97.000
x <- trends[, hist(nyrBT)]

trends[, plot(nyrBT, Jtutrend, log = 'x', col = '#00000033')]
NULL

trends[, plot(nyrBT, Jtutrendrem0, log = 'x', col = '#00000033')]
NULL

trends[, plot(nyrBT, Jtulast, log = 'x', col = '#00000033')]
NULL

Change compared to number of species in time-series
# number of species
trends[, summary(Nspp)]
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 6.00 11.00 19.35 24.00 1427.00
x <- trends[, hist(Nspp)]

trends[, plot(Nspp, Jtutrend, log = 'x', col = '#00000033')]
NULL

trends[, plot(Nspp, Jtutrendrem0, log = 'x', col = '#00000033')]
NULL

trends[, plot(Nspp, Jtulast, log = 'x', col = '#00000033')]
NULL

Remove studies with only 1 species
nrow(trends)
trends <- trends[Nspp > 1, ]
nrow(trends)
Turnover calculations are correlated, though less so for Horn
# are turnover calculations correlated?
ggplot(trends, aes(Jbetatrendrem0, Jtutrendrem0)) +
geom_point(alpha = 0.3) +
geom_smooth()
ggplot(trends, aes(Jbetatrendrem0, Horntrendrem0)) +
geom_point(alpha = 0.3) +
geom_smooth()
Temporal turnover is not all that correlated between including first year or not.
ggplot(trends, aes(Jtutrend, Jtutrendrem0)) +
geom_point(alpha = 0.3) +
geom_smooth() +
geom_abline(a = 0, b = 1)
ggplot(trends, aes(Jbetatrend, Jbetatrendrem0)) +
geom_point(alpha = 0.3) +
geom_smooth() +
geom_abline(a = 0, b = 1)
ggplot(trends, aes(Horntrend, Horntrendrem0)) +
geom_point(alpha = 0.3) +
geom_smooth() +
geom_abline(a = 0, b = 1)
Compare covariates across realms
i <- trends[, !duplicated(rarefyID)]; sum(i)
par(mfrow=c(5,3))
beanplot(rarefyID_y ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Latitude (degN)', ll = 0.05)
beanplot(tempave ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Temperature (degC)', ll = 0.05)
beanplot(tempave_metab ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Metabolic Temperature (degC)', ll = 0.05, bw = 'nrd0') # nrd0 bandwidth to calculation gap
beanplot(seas ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Seasonality (degC)', ll = 0.05)
beanplot(microclim ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Microclimates (degC)', ll = 0.05)
beanplot(temptrend ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Temperature trend (degC/yr)', ll = 0.05)
beanplot(mass_mean_weight ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Mass (g)', ll = 0.05, log = 'y')
beanplot(speed_mean_weight +1 ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Speed (km/hr)', ll = 0.05, log = 'y')
beanplot(lifespan_mean_weight ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Lifespan (yr)', ll = 0.05, log = 'y')
#beanplot(consfrac ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Consumers (fraction)', ll = 0.05, log = '') # too sparse
#beanplot(endofrac ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Endotherms (fraction)', ll = 0.05, log = '') # too sparse
beanplot(Nspp ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Number of species', ll = 0.05, log = 'y')
beanplot(thermal_bias ~ REALM, data = trends[i & !is.na(thermal_bias),], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Thermal bias (degC)', ll = 0.05)
beanplot(npp ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'NPP', ll = 0.05)
beanplot(veg ~ REALM, data = trends[i & REALM !='Marine',], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'NPP', ll = 0.05)
Marine are in generally warmer locations (seawater doesn’t freeze) Marine have much lower seasonality. Marine and freshwater have some very small masses (plankton), but much of dataset is similar to terrestrial. Marine has a lot of slow, crawling organisms, but land has plants. Land also has birds (fast).
Plot turnover vs. temperature
Time-series length and temperature trend?
ggplot(trends, aes(temptrend, nyrBT)) +
geom_point() +
geom_smooth() +
scale_y_log10()
Plot turnover vs. explanatory variables
Lines are ggplot smoother fits.
Strong trends with temperature change, but trends are pretty symmetric around no trend in temperature, which implies warming or cooling drives similar degree of community turnover. Some indication of less turnover for larger organisms (mass) Higher turnover on land with higher seasonality? More turnover for shorter-lived organisms? No really clear differences among realms.
Write out
write.csv(trends, gzfile('output/turnover_w_covariates.csv.gz'), row.names = FALSE)
Useful variables
# realm that combined Terrestrial and Freshwater, for interacting with human impact
trends[, REALM2 := REALM]
levels(trends$REALM2) = list(TerrFresh = "Freshwater", TerrFresh = "Terrestrial", Marine = "Marine")
# group Marine invertebrates/plants in with All
trends[, taxa_mod2 := taxa_mod]
trends[taxa_mod == 'Marine invertebrates/plants', taxa_mod2 := 'All']
Do the variables look ok?
Unscaled
# histograms to examine
cexmain = 0.6
par(mfrow = c(5,4))
invisible(trends[, hist(minyrBT, main = 'Start year', cex.main = cexmain)])
invisible(trends[, hist(maxyrBT - minyrBT, main = 'Duration (years)', cex.main = cexmain)])
invisible(trends[, hist(nyrBT, main = 'Number of sampled years', cex.main = cexmain)])
invisible(trends[, hist(mass_mean_weight, main = 'Mass (g)', cex.main = cexmain)])
invisible(trends[, hist(speed_mean_weight, main = 'Speed (km/hr)', cex.main = cexmain)])
invisible(trends[, hist(lifespan_mean_weight, main = 'Lifespan (yr)', cex.main = cexmain)])
invisible(trends[, hist(tempave_metab, main = 'Metabolic temperature (°C)', cex.main = cexmain)])
invisible(trends[, hist(consfrac, main = 'Consumers (fraction)', cex.main = cexmain)])
invisible(trends[, hist(endofrac, main = 'Endotherms (fraction)', cex.main = cexmain)])
invisible(trends[, hist(tempave, main = 'Environmental temperature (°C)', cex.main = cexmain)])
invisible(trends[, hist(temptrend, main = 'Temperature trend (°C/yr)', cex.main = cexmain)])
invisible(trends[, hist(seas, main = 'Seasonality (°C)', cex.main = cexmain)])
invisible(trends[, hist(microclim, main = 'Microclimates (°C)', cex.main = cexmain)])
invisible(trends[, hist(Nspp, main = 'Species richness', cex.main = cexmain)])
invisible(trends[, hist(thermal_bias, main = 'Thermal bias (°C)', cex.main = cexmain)])
invisible(trends[, hist(npp, main = 'Net primary productivity', cex.main = cexmain)])
invisible(trends[, hist(veg, main = 'Vegetation index', cex.main = cexmain)])
invisible(trends[, hist(human_bowler, main = 'Human impact score (Bowler)', cex.main = cexmain)])
invisible(trends[, hist(human_venter, main = 'Human impact score (Venter)', cex.main = cexmain)])
invisible(trends[, hist(human_halpern, main = 'Human impact score (Halpern)', cex.main = cexmain)])
Scaled
# histograms to examine
cexmain = 0.6
par(mfrow = c(5,4))
invisible(trends[, hist(tempave.sc, main = 'Environmental temperature (°C)', cex.main = cexmain)])
invisible(trends[, hist(tempave_metab.sc, main = 'Metabolic temperature (°C)', cex.main = cexmain)])
invisible(trends[, hist(seas.sc, main = 'Seasonality (°C)', cex.main = cexmain)])
invisible(trends[, hist(microclim.sc, main = 'log Microclimates (°C)', cex.main = cexmain)])
invisible(trends[, hist(temptrend.sc, main = 'Temperature trend (°C/yr)', cex.main = cexmain)])
invisible(trends[, hist(temptrend_abs.sc, main = 'abs(Temperature trend) (°C/yr)', cex.main = cexmain)])
invisible(trends[, hist(mass.sc, main = 'log Mass (g)', cex.main = cexmain)])
invisible(trends[, hist(speed.sc, main = 'log Speed (km/hr)', cex.main = cexmain)])
invisible(trends[, hist(lifespan.sc, main = 'log Lifespan (yr)', cex.main = cexmain)])
invisible(trends[, hist(consumerfrac.sc, main = 'Consumers (fraction)', cex.main = cexmain)])
invisible(trends[, hist(endothermfrac.sc, main = 'Endotherms (fraction)', cex.main = cexmain)])
invisible(trends[, hist(nspp.sc, main = 'log Species richness', cex.main = cexmain)])
invisible(trends[, hist(thermal_bias.sc, main = 'Thermal bias (°C)', cex.main = cexmain)])
invisible(trends[, hist(npp.sc, main = 'log Net primary productivity', cex.main = cexmain)])
invisible(trends[, hist(veg.sc, main = 'log Vegetation index', cex.main = cexmain)])
invisible(trends[, hist(human_bowler.sc, main = 'log Human impact score (Bowler)', cex.main = cexmain)])
invisible(trends[, hist(human_footprint.sc, main = 'log Human impact score (Venter & Halpern)', cex.main = cexmain)])
Check correlations among variables. Pearson’s r is on the lower diagonal.
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y, use = 'pairwise.complete.obs')
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt) #, cex = cex.cor * r)
}
pairs(formula = ~ tempave.sc + tempave_metab.sc + seas.sc + microclim.sc + temptrend.sc + temptrend_abs.sc + mass.sc + speed.sc + lifespan.sc + consumerfrac.sc + endothermfrac.sc + nspp.sc + thermal_bias.sc + npp.sc + veg.sc + human_bowler.sc + human_footprint.sc, data = trends, gap = 1/10, cex = 0.2, col = '#00000022', lower.panel = panel.cor)
Mass and lifespan look tightly correlated, but r only 0.56…? Tempave_metab and lifespan don’t look tightly correlated, but r= -0.81 Tempave_metab and speed don’t look tightly correlated, but r= -0.83 Lifespan and speed don’t look tightly correlated, but r = 0.73
---
title: "Turnover covariate data prep"
output:
  github_document: default
  html_notebook: default
---

```{r setup}
library(data.table)
library(ggplot2)
library(beanplot) # for beanplots
library(gridExtra) # to combine ggplots together
library(grid) # to combine ggplots together


knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file()) # tell RStudio to use project root directory as the root for this notebook. Needed since we are storing code in a separate directory.
```

### Load data
```{r load data}
# BioTime
load('data/biotime_blowes/bt_malin.Rdata')
bt <- data.table(bt_malin); rm(bt_malin)

# Temperature average, trends, and seasonality
temperature <- fread('output/temperature_byrarefyID.csv.gz')

# microclimates
microclim <- fread('output/microclimates.csv.gz', drop = 1)

# NPP
npp <- fread('output/npplandocean.csv.gz')

# Body size
bs <- fread('output/mass_byrarefyid.csv.gz', drop = 1)
bs[, ':='(STUDY_ID = NULL, REALM = NULL, taxa_mod = NULL)] # remove unnecessary columns 

# Mobility
speed <- fread('output/speed_byrarefyID.csv.gz', drop = 1)
speed[, ':='(STUDY_ID = NULL, REALM = NULL, taxa_mod = NULL)] # remove unnecessary columns 

# Lifespan
lsp <- fread('output/lifespan_byrarefyID.csv.gz')

# CTI
cti <- fread('output/cti_byrarefyID.csv.gz')
    
# consumer vs. producer
consfrac <- fread('output/consfrac_byrarefyID.csv.gz')

# richness
rich <- fread('output/richness_by_rarefyID.csv.gz') # number of species

# endotherm vs. ectotherm
endofrac <- fread('output/endofrac_byrarefyID.csv.gz') # endotherm vs. ectotherm classifications

# human impact
human <- fread('output/humanimpact_by_rarefyID.csv.gz')

# %veg
veg <- as.data.table(readRDS('output/vct_by_rarefyID.rds'))
veg[, veg := (`tree cover % (mean)` + 0.5 * `non-tree veg. % (mean)`)/100] # veg index from 0 (all non-veg) to 1 (all tree). Non-tree veg counts as 0.5.
```

### Plot a turnover example
First is a long time-series, next one shows an example result of removing the first year of self-comparison.
```{r plot turnover}
#bt[, .(n = .N), by = rarefyID][n > 50,]

ggplot(bt[rarefyID == '339_1085477'], aes(YEAR, Jtu_base)) +
    geom_point() +
    geom_smooth(method = 'lm') +
    xlab('Year') + ylab('Jaccard dissimilarity')

ggplot(bt[rarefyID == '489_16754'], aes(YEAR, Jtu_base)) +
    geom_point() +
    geom_smooth(method = 'lm', se = FALSE) +
    xlab('Year') + ylab('Jaccard dissimilarity') +
  geom_smooth(data = bt[rarefyID == '489_16754' & YEAR != 2000], method = 'lm', se = FALSE, color = 'red')
```

### Assemble dataset of beta diversity trends (temporal turnover) and covariates
```{r assemble trends, warning=FALSE, message=FALSE}
# calculate temporal turnover
calctrend <- function(y, YEAR, nm = 'y'){ # function to calc trends
  # turn off warnings for the following
  defaultW <- getOption("warn")
  options(warn = -1)
  
  mod <- lm(y ~ YEAR)
  out <- list(y = coef(mod)[2], # coef for the slope
              y_se = sqrt(diag(vcov(mod)))[2]) # SE
  names(out) <- c(nm, paste0(nm, '_se'))
  options(warn = defaultW)
  return(out)
}

# function to calc trends, removing first year with 0
calctrendrem0 <- function(y, YEAR, nm = 'y'){
  # turn off warnings for the following
  defaultW <- getOption("warn")
  options(warn = -1)
  
  if(length(YEAR)>2){
    o <- order(YEAR)
    YEAR2 <- YEAR[o][2:length(YEAR)]
    y2 <- y[o][2:length(y)]
    
    if(sum(!is.na(y2)) >= 2){ # make sure enough values to fit a line
      mod <- lm(y2 ~ YEAR2)
      out <- list(y = coef(mod)[2], # coef for the slope
                  y_se = sqrt(diag(vcov(mod)))[2]) # SE
      names(out) <- c(nm, paste0(nm, '_se'))
      options(warn = defaultW)
      return(out)
      
    } else {
      out <- list(y = NA_real_, y_se = NA_real_)
      names(out) <- c(nm, paste0(nm, '_se'))
      
      options(warn = defaultW)
      return(out)
    }
    
  } else {
    out <- list(y = NA_real_, y_se = NA_real_)
    names(out) <- c(nm, paste0(nm, '_se'))
  
    options(warn = defaultW)
    return(out)
  }
  

}

calcfirstlast <- function(y, YEAR, nm = 'y'){ # function to get distance from last to first year
  # turn off warnings for the following
  defaultW <- getOption("warn")
  options(warn = -1)
  
  if(length(YEAR)>1){
    o <- order(YEAR)
    YEAR2 <- YEAR[o][2:length(YEAR)]
    y2 <- y[o][2:length(y)]
    
    out <- list(y = y2[length(y2)], # dissimilarity for last year
                y_se = NA_real_)
  } else {
    out <- list(y = NA_real_, y_se = NA_real_)
  }

  names(out) <- c(nm, paste0(nm, '_se'))
  options(warn = defaultW)
  return(out)
}



setkey(bt, STUDY_ID, rarefyID, YEAR)

if(file.exists('temp/trendstemp.rds')){
  trends <- readRDS('temp/trendstemp.rds')
} else {
  
  trends1 <- bt[, calctrend(Jtu_base, YEAR, 'Jtutrend'),
                by = .(REALM, Biome, taxa_mod, STUDY_ID, rarefyID, rarefyID_x, rarefyID_y)] # calculate trend in Jaccard turnover from first year, plus SEs
  trends2 <- bt[, calctrend(Jbeta_base, YEAR, 'Jbetatrend'),
                by = .(rarefyID)] # calculate trend in total Jaccard' beta diversity's from first year,
  trends3 <- bt[, calctrend(1-Horn_base, YEAR, 'Horntrend'),
                by = .(rarefyID)] # calculate trend in Horn-Morisita from first year. Convert to dissimilarity.
  trends4 <- bt[, .(Strend = coef(lm(I(log(S)) ~ YEAR))[2]), by = .(rarefyID)] # trend in log(S)
  
  trends5 <- bt[, calctrendrem0(Jtu_base, YEAR, 'Jtutrendrem0'), 
                by = .(rarefyID)] # calculate trend in Jaccard turnover without first year
  trends6 <- bt[, calctrendrem0(Jbeta_base, YEAR, 'Jbetatrendrem0'),
                by = .(rarefyID)]
  trends7 <- bt[, calctrendrem0(1-Horn_base, YEAR, 'Horntrendrem0'),
                by = .(rarefyID)]
  
  trends8 <- bt[, calcfirstlast(Jtu_base, YEAR, 'Jtulast'),
                by = .(rarefyID)]
  trends9 <- bt[, calcfirstlast(Jbeta_base, YEAR, 'Jbetalast'),
                by = .(rarefyID)]
  trends10 <- bt[, calcfirstlast(1-Horn_base, YEAR, 'Hornlast'),
                 by = .(rarefyID)]
  
  
  nyrBT <-  bt[, .(nyrBT = length(YEAR),
                   minyrBT = min(YEAR),
                   maxyrBT = max(YEAR),
                   medianyrBT = median(YEAR),
                   meanyrBT = mean(YEAR)),
               by = .(rarefyID)] # number of years in time-series
  
  trends <- merge(trends1, trends2) # merge in total J and Horn-Morisita
  trends <- merge(trends, trends3)
  trends <- merge(trends, trends5)
  trends <- merge(trends, trends6)
  trends <- merge(trends, trends7)
  trends <- merge(trends, trends8)
  trends <- merge(trends, trends9)
  trends <- merge(trends, trends10)
  trends <- merge(trends, nyrBT)
  
  saveRDS(trends, file = 'temp/trendstemp.rds')
  
}
```

Add covariates
```{r add covariate data}
# add covariates
trends <- merge(trends, temperature[, .(rarefyID, tempave, tempave_metab, temptrend, seas)], all.x = TRUE, by = 'rarefyID') # temperature ave, ave metabolic, trend, and seasonality
trends <- merge(trends, microclim[, .(rarefyID, microclim = Temp_sd20km)], all.x = TRUE, by = 'rarefyID') # microclimates
trends <- merge(trends, npp, all.x = TRUE, by = 'rarefyID') # npp
trends <- merge(trends, bs[, .(rarefyID, mass_mean_weight, mass_sd_weight)], all.x = TRUE) # body size mass (g)
trends <- merge(trends, speed[, .(rarefyID, speed_mean_weight, speed_sd_weight)], all.x = TRUE) # speed (km/hr)
trends <- merge(trends, lsp[, .(rarefyID, lifespan_mean_weight, lifespan_sd_weight)], all.x = TRUE) # lifespan (yr)
trends <- merge(trends, cti[, .(rarefyID, thermal_bias)], all.x = TRUE) # thermal bias (degC)
trends <- merge(trends, consfrac[, .(rarefyID, consfrac)], all.x = TRUE) # fraction consumers
trends <- merge(trends, rich, all.x = TRUE) # species richness
trends <- merge(trends, endofrac[, .(rarefyID, endofrac)], all.x = TRUE) # endotherm vs. ectotherm
trends <- merge(trends, human[, .(rarefyID, human_bowler = atc, human_venter = hfp, human_halpern = himp)], all.x = TRUE) # human impact
trends <- merge(trends, veg[, .(rarefyID, veg = veg)], all.x = TRUE) # vegetation index
trends[REALM == 'Marine', veg := 0] # veg index is 0 at sea
```

### Do some basic checks of the turnover calculations
#### Histograms of temporal change
```{r histograms of change, messages = FALSE}
# basic checks
trends

x <- trends[, hist(Jtutrend)]
x <- trends[, hist(Jbetatrend)]
x <- trends[, hist(Horntrend)]

x <- trends[, hist(Jtutrendrem0)]
x <- trends[, hist(Jbetatrendrem0)]
x <- trends[, hist(Horntrendrem0)]

x <- trends[, hist(Jtulast)]
x <- trends[, hist(Jbetalast)]
x <- trends[, hist(Hornlast)]

```

#### Change compared to number of years in time-series
```{r change vs. num years}
# number of year
trends[, summary(nyrBT)]
x <- trends[, hist(nyrBT)]
trends[, plot(nyrBT, Jtutrend, log = 'x', col = '#00000033')]
trends[, plot(nyrBT, Jtutrendrem0, log = 'x', col = '#00000033')]
trends[, plot(nyrBT, Jtulast, log = 'x', col = '#00000033')]
```

#### Change compared to number of species in time-series
```{r change vs. number of species}
# number of species
trends[, summary(Nspp)]
x <- trends[, hist(Nspp)]
trends[, plot(Nspp, Jtutrend, log = 'x', col = '#00000033')]
trends[, plot(Nspp, Jtutrendrem0, log = 'x', col = '#00000033')]
trends[, plot(Nspp, Jtulast, log = 'x', col = '#00000033')]
```

Remove studies with only 1 species
```{r remove 1-spp studies}
nrow(trends)
trends <- trends[Nspp > 1, ]
nrow(trends)
```

Turnover calculations are correlated, though less so for Horn
```{r basic pairwise graphs of turnover metrics}
# are turnover calculations correlated?
ggplot(trends, aes(Jbetatrendrem0, Jtutrendrem0)) +
    geom_point(alpha = 0.3) +
    geom_smooth()

ggplot(trends, aes(Jbetatrendrem0, Horntrendrem0)) +
    geom_point(alpha = 0.3) +
    geom_smooth()

```


Temporal turnover is not all that correlated between including first year or not.
```{r turnover metrics with and without first year}
ggplot(trends, aes(Jtutrend, Jtutrendrem0)) +
  geom_point(alpha = 0.3) +
  geom_smooth() +
  geom_abline(a = 0, b = 1)

ggplot(trends, aes(Jbetatrend, Jbetatrendrem0)) +
  geom_point(alpha = 0.3) +
  geom_smooth() +
  geom_abline(a = 0, b = 1)

ggplot(trends, aes(Horntrend, Horntrendrem0)) +
  geom_point(alpha = 0.3) +
  geom_smooth() +
  geom_abline(a = 0, b = 1)

```

## Compare covariates across realms
```{r compare across realms, fig.height=12, fig.width=9}
i <- trends[, !duplicated(rarefyID)]; sum(i)
par(mfrow=c(5,3))
beanplot(rarefyID_y ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Latitude (degN)', ll = 0.05)
beanplot(tempave ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Temperature (degC)', ll = 0.05)
beanplot(tempave_metab ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Metabolic Temperature (degC)', ll = 0.05, bw = 'nrd0') # nrd0 bandwidth to calculation gap
beanplot(seas ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Seasonality (degC)', ll = 0.05)
beanplot(microclim ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Microclimates (degC)', ll = 0.05)
beanplot(temptrend ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Temperature trend (degC/yr)', ll = 0.05)
beanplot(mass_mean_weight ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Mass (g)', ll = 0.05, log = 'y')
beanplot(speed_mean_weight +1 ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Speed (km/hr)', ll = 0.05, log = 'y')
beanplot(lifespan_mean_weight ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Lifespan (yr)', ll = 0.05, log = 'y')
#beanplot(consfrac ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Consumers (fraction)', ll = 0.05, log = '') # too sparse
#beanplot(endofrac ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Endotherms (fraction)', ll = 0.05, log = '') # too sparse
beanplot(Nspp ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Number of species', ll = 0.05, log = 'y')
beanplot(thermal_bias ~ REALM, data = trends[i & !is.na(thermal_bias),], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'Thermal bias (degC)', ll = 0.05)
beanplot(npp ~ REALM, data = trends[i,], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'NPP', ll = 0.05)
beanplot(veg ~ REALM, data = trends[i & REALM !='Marine',], what = c(1,1,1,1), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", ylab = 'NPP', ll = 0.05)

```

Marine are in generally warmer locations (seawater doesn't freeze)
Marine have much lower seasonality.
Marine and freshwater have some very small masses (plankton), but much of dataset is similar to terrestrial.
Marine has a lot of slow, crawling organisms, but land has plants. Land also has birds (fast).






## Plot turnover vs. temperature
```{r plot turnover vs temp trend, echo=FALSE, fig.height = 8, fig.width = 9,}
p1 <- ggplot(trends, aes(temptrend, Jtutrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/yr)', y = 'Jaccard turnover temporal trend', title = 'All')

# Jaccard total trend vs. temperature trend (across all years)
p2 <- ggplot(trends, aes(temptrend, Jbetatrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/year)', y = 'Jaccard total temporal trend')

# Horn-Morisita turnover trend vs. temperature trend (across all years)
p3 <- ggplot(trends, aes(temptrend, Horntrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/year)', y = 'Morisita-Horn temporal turnover')

# same, but only ts >= 5 yrs
p4 <- ggplot(trends[nyrBT >= 5, ], aes(temptrend, Jtutrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/yr)', y = 'Jaccard turnover temporal trend', title = '>= 5 yrs')

p5 <- ggplot(trends[nyrBT >= 5, ], aes(temptrend, Jbetatrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/year)', y = 'Jaccard total temporal trend')

p6 <- ggplot(trends[nyrBT >= 5, ], aes(temptrend, Horntrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/year)', y = 'Morisita-Horn temporal turnover')

# same, but removing year 1
p7 <- ggplot(trends, aes(temptrend, Jtutrendrem0, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/yr)', y = 'Jaccard turnover temporal trend w/out year 1', title = 'Without year 1')

p8 <- ggplot(trends, aes(temptrend, Jbetatrendrem0, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/year)', y = 'Jaccard total temporal trend w/out year 1')

p9 <- ggplot(trends, aes(temptrend, Horntrendrem0, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature trend (°C/year)', y = 'Morisita-Horn temporal turnover w/out year 1')


grid.arrange(p1, p4, p7, p2, p5, p8, p3, p6, p9, ncol = 3)
```

### Time-series length and temperature trend?
```{r ts length and temp trend}
ggplot(trends, aes(temptrend, nyrBT)) +
  geom_point() +
  geom_smooth() +
  scale_y_log10()
```

## Plot turnover vs. explanatory variables
Lines are ggplot smoother fits.
```{r plot turnover v explanatory vars, echo=FALSE, fig.height = 16, fig.width = 9,}

p1 <- ggplot(trends, aes(REALM, Jtutrend)) +
  geom_boxplot(na.rm = TRUE) + 
  labs(x = 'Realm', y = 'Jaccard turnover temporal trend')

p2 <- ggplot(trends, aes(tempave, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Temperature (°C)', y = 'Jaccard turnover temporal trend')

p3 <- ggplot(trends, aes(tempave_metab, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Metabolic temperature (°C)', y = 'Jaccard turnover temporal trend')

p4 <- ggplot(trends, aes(seas, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Seasonality (°C)', y = 'Jaccard turnover temporal trend')

p5 <- ggplot(trends, aes(microclim, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  scale_x_log10() +
  labs(x = 'Microclimate availability (°C)', y = 'Jaccard turnover temporal trend')

p6 <- ggplot(trends, aes(mass_mean_weight, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  scale_x_log10() +
  labs(x = 'Mass (g))', y = 'Jaccard turnover temporal trend')

p7 <- ggplot(trends, aes(speed_mean_weight+1, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  scale_x_log10() +
  labs(x = 'Speed (km / hr))', y = 'Jaccard turnover temporal trend')

p8 <- ggplot(trends, aes(lifespan_mean_weight, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  scale_x_log10() +
  labs(x = 'Lifespan (yr))', y = 'Jaccard turnover temporal trend')

p9 <- ggplot(trends, aes(consfrac, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs", k = 3), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Fraction consumers', y = 'Jaccard turnover temporal trend')

p10 <- ggplot(trends, aes(endofrac, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs", k = 3), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Fraction endotherms', y = 'Jaccard turnover temporal trend')

p11 <- ggplot(trends, aes(Nspp, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  scale_x_log10() +
  labs(x = 'Number of species', y = 'Jaccard turnover temporal trend')

p12 <- ggplot(trends, aes(thermal_bias, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Thermal bias (°C)', y = 'Jaccard turnover temporal trend')

p13 <- ggplot(trends, aes(npp, Jtutrend, size = nyrBT)) +
  geom_point(aes(color = REALM), size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs"), na.rm = TRUE, color = 'black') +
  scale_color_brewer(palette="Set1") + 
  scale_x_log10() +
  labs(x = 'Net primary productivity (mg C / m2 / day)', y = 'Jaccard turnover temporal trend')

p14 <- ggplot(trends, aes(veg, Jtutrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs", k = 5), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Vegetation score', y = 'Jaccard turnover temporal trend')

p15 <- ggplot(trends, aes(human_bowler, Jtutrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs", k = 5), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'Bowler human impact score', y = 'Jaccard turnover temporal trend')

p16 <- ggplot(trends, aes(human_venter, Jtutrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs", k = 5), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'scaled Human impact score (Venter)', y = 'Jaccard turnover temporal trend')

p17 <- ggplot(trends, aes(human_halpern, Jtutrend, color = REALM, size = nyrBT)) +
  geom_point(size = 0.2, alpha = 0.5, na.rm = TRUE) + 
  geom_smooth(method = 'gam', formula = y ~ s(x, bs = "cs", k = 5), na.rm = TRUE) +
  scale_color_brewer(palette="Set1") + 
  labs(x = 'scaled Human impact score (Halpern)', y = 'Jaccard turnover temporal trend')


grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, ncol = 2)
```

Strong trends with temperature change, but trends are pretty symmetric around no trend in temperature, which implies warming or cooling drives similar degree of community turnover.
Some indication of less turnover for larger organisms (mass)
Higher turnover on land with higher seasonality?
More turnover for shorter-lived organisms?
No really clear differences among realms.

### Write out
```{r write out}
write.csv(trends, gzfile('output/turnover_w_covariates.csv.gz'), row.names = FALSE)
```


### Useful variables
```{r useful vars}
# realm that combined Terrestrial and Freshwater, for interacting with human impact
trends[, REALM2 := REALM]
levels(trends$REALM2) = list(TerrFresh = "Freshwater", TerrFresh = "Terrestrial", Marine = "Marine")

# group Marine invertebrates/plants in with All
trends[, taxa_mod2 := taxa_mod]
trends[taxa_mod == 'Marine invertebrates/plants', taxa_mod2 := 'All']
```


### Log-transform some variables, then center and scale. 
``` {r center and scale}
trends[, tempave.sc := scale(tempave)]
trends[, tempave_metab.sc := scale(tempave_metab)]
trends[, seas.sc := scale(seas)]
trends[, microclim.sc := scale(log(microclim))]
trends[, temptrend.sc := scale(temptrend, center = FALSE)]
trends[, temptrend_abs.sc := scale(abs(temptrend), center = FALSE)] # do not center, so that 0 is still 0 temperature change
trends[, mass.sc := scale(log(mass_mean_weight))]
trends[, speed.sc := scale(log(speed_mean_weight+1))]
trends[, lifespan.sc := scale(log(lifespan_mean_weight))]
trends[, consumerfrac.sc := scale(consfrac)]
trends[, endothermfrac.sc := scale(endofrac)]
trends[, nspp.sc := scale(log(Nspp))]
trends[, thermal_bias.sc := scale(thermal_bias)]
trends[, npp.sc := scale(log(npp))]
trends[, veg.sc := scale(log(veg+1))]
trends[, human_bowler.sc := scale(log(human_bowler+1)), by = REALM2] # separate scaling by realm
trends[REALM2 == 'TerrFresh', human_footprint.sc := scale(log(human_venter+1))]
trends[REALM2 == 'Marine', human_footprint.sc := scale(log(human_halpern))]
```


### Do the variables look ok?
#### Unscaled
```{r histograms unscaled, fig.height = 9}
# histograms to examine
cexmain = 0.6
par(mfrow = c(5,4))
invisible(trends[, hist(minyrBT, main = 'Start year', cex.main = cexmain)])
invisible(trends[, hist(maxyrBT - minyrBT, main = 'Duration (years)', cex.main = cexmain)])
invisible(trends[, hist(nyrBT, main = 'Number of sampled years', cex.main = cexmain)])
invisible(trends[, hist(mass_mean_weight, main = 'Mass (g)', cex.main = cexmain)])
invisible(trends[, hist(speed_mean_weight, main = 'Speed (km/hr)', cex.main = cexmain)])
invisible(trends[, hist(lifespan_mean_weight, main = 'Lifespan (yr)', cex.main = cexmain)])
invisible(trends[, hist(tempave_metab, main = 'Metabolic temperature (°C)', cex.main = cexmain)])
invisible(trends[, hist(consfrac, main = 'Consumers (fraction)', cex.main = cexmain)])
invisible(trends[, hist(endofrac, main = 'Endotherms (fraction)', cex.main = cexmain)])
invisible(trends[, hist(tempave, main = 'Environmental temperature (°C)', cex.main = cexmain)])
invisible(trends[, hist(temptrend, main = 'Temperature trend (°C/yr)', cex.main = cexmain)])
invisible(trends[, hist(seas, main = 'Seasonality (°C)', cex.main = cexmain)])
invisible(trends[, hist(microclim, main = 'Microclimates (°C)', cex.main = cexmain)])
invisible(trends[, hist(Nspp, main = 'Species richness', cex.main = cexmain)])
invisible(trends[, hist(thermal_bias, main = 'Thermal bias (°C)', cex.main = cexmain)])
invisible(trends[, hist(npp, main = 'Net primary productivity', cex.main = cexmain)])
invisible(trends[, hist(veg, main = 'Vegetation index', cex.main = cexmain)])
invisible(trends[, hist(human_bowler, main = 'Human impact score (Bowler)', cex.main = cexmain)])
invisible(trends[, hist(human_venter, main = 'Human impact score (Venter)', cex.main = cexmain)])
invisible(trends[, hist(human_halpern, main = 'Human impact score (Halpern)', cex.main = cexmain)])

```

#### Scaled
```{r histograms scaled, fig.height = 9}
# histograms to examine
cexmain = 0.6
par(mfrow = c(5,4))
invisible(trends[, hist(tempave.sc, main = 'Environmental temperature (°C)', cex.main = cexmain)])
invisible(trends[, hist(tempave_metab.sc, main = 'Metabolic temperature (°C)', cex.main = cexmain)])
invisible(trends[, hist(seas.sc, main = 'Seasonality (°C)', cex.main = cexmain)])
invisible(trends[, hist(microclim.sc, main = 'log Microclimates (°C)', cex.main = cexmain)])
invisible(trends[, hist(temptrend.sc, main = 'Temperature trend (°C/yr)', cex.main = cexmain)])
invisible(trends[, hist(temptrend_abs.sc, main = 'abs(Temperature trend) (°C/yr)', cex.main = cexmain)])
invisible(trends[, hist(mass.sc, main = 'log Mass (g)', cex.main = cexmain)])
invisible(trends[, hist(speed.sc, main = 'log Speed (km/hr)', cex.main = cexmain)])
invisible(trends[, hist(lifespan.sc, main = 'log Lifespan (yr)', cex.main = cexmain)])
invisible(trends[, hist(consumerfrac.sc, main = 'Consumers (fraction)', cex.main = cexmain)])
invisible(trends[, hist(endothermfrac.sc, main = 'Endotherms (fraction)', cex.main = cexmain)])
invisible(trends[, hist(nspp.sc, main = 'log Species richness', cex.main = cexmain)])
invisible(trends[, hist(thermal_bias.sc, main = 'Thermal bias (°C)', cex.main = cexmain)])
invisible(trends[, hist(npp.sc, main = 'log Net primary productivity', cex.main = cexmain)])
invisible(trends[, hist(veg.sc, main = 'log Vegetation index', cex.main = cexmain)])
invisible(trends[, hist(human_bowler.sc, main = 'log Human impact score (Bowler)', cex.main = cexmain)])
invisible(trends[, hist(human_footprint.sc, main = 'log Human impact score (Venter & Halpern)', cex.main = cexmain)])

```



### Check correlations among variables. Pearson's r is on the lower diagonal.
```{r pairs, fig.height=10, fig.width=10}
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- cor(x, y, use = 'pairwise.complete.obs')
    txt <- format(c(r, 0.123456789), digits = digits)[1]
    txt <- paste0(prefix, txt)
    if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
    text(0.5, 0.5, txt) #, cex = cex.cor * r)
}
pairs(formula = ~ tempave.sc + tempave_metab.sc + seas.sc + microclim.sc + temptrend.sc + temptrend_abs.sc + mass.sc + speed.sc + lifespan.sc + consumerfrac.sc + endothermfrac.sc + nspp.sc + thermal_bias.sc + npp.sc + veg.sc + human_bowler.sc + human_footprint.sc, data = trends, gap = 1/10, cex = 0.2, col = '#00000022', lower.panel = panel.cor)

```

Mass and lifespan look tightly correlated, but r only 0.56...?
Tempave_metab and lifespan don't look tightly correlated, but r= -0.81 
Tempave_metab and speed don't look tightly correlated, but r= -0.83 
Lifespan and speed don't look tightly correlated, but r = 0.73
